import ast
from flask import Blueprint, request
from scipy.stats import spearmanr, chi2, chisquare, stats, chi2_contingency, linregress
import statsmodels.api as sm
from scipy.stats import kendalltau
import numpy as np
import scipy.stats as stats

inferentialStatistics_api=Blueprint('inferentialStatistics_api',__name__)

# 推论统计
# ---------------------------------------------------------------------------------

@inferentialStatistics_api.route("/corelation/calProductMoment",methods=["POST"])
def calculateCovariance():
    """
    计算给定两个数组 x 和 y 的积差系数
    :param x: 数组 x
    :param y: 数组 y
    :return: 积差系数
    """
    x = request.form.get("x")
    y = request.form.get("y")

    # str转list
    x = ast.literal_eval(x)
    y = ast.literal_eval(y)


    n = len(x)
    assert n == len(y), "数组 x 和 y 的长度必须相等"

    sum_x = sum(x)
    sum_y = sum(y)
    sum_xy = sum([xi * yi for xi, yi in zip(x, y)])
    sum_x_squared = sum([xi ** 2 for xi in x])
    sum_y_squared = sum([yi ** 2 for yi in y])

    numerator = n * sum_xy - sum_x * sum_y
    denominator = ((n * sum_x_squared - sum_x ** 2) * (n * sum_y_squared - sum_y ** 2)) ** 0.5
    r = numerator / denominator

    return {"productMoment":r}
# arr1 = [1, 2, 3, 4, 5]
# arr2 = [6, 7, 8, 9, 10]
# 输出 0.9999999999999999
# 解释：arr1 和 arr2 是完全线性相关的，积差系数为 1.0，但由于浮点运算精度问题，计算结果为 0.9999999999999999。

@inferentialStatistics_api.route("/corelation/calRank",methods=["POST"])
def calculateRankCorrelation():
    """
    计算两个数组的斯皮尔曼等级相关系数，以及对应的等级序数和等级差等值
    :param x: 第一个数组
    :param y: 第二个数组
    ：param parameterType:spearman 或者 kendall
    :return: 一个元组，包含斯皮尔曼系数，x的等级序数和y的等级序数，x的等级差等值，y的等级差等值
    """
    x = request.form.get("x")
    y = request.form.get("y")
    parameterType = request.form.get("parameterType")
    # str转list
    x = ast.literal_eval(x)
    y = ast.literal_eval(y)

    if parameterType=="spearman":
        # 获取数组长度
        n = len(x)

        # 将数组转化为等级序数
        rank_arr1 = np.argsort(np.argsort(x))
        rank_arr2 = np.argsort(np.argsort(y))

        # 计算等级差等值
        d = rank_arr1 - rank_arr2

        # 计算斯皮尔曼相关系数
        spearman_coeff = 1 - (6 * np.sum(d ** 2)) / (n * (n ** 2 - 1))

        # 返回相关系数，等级序数以及等级差等值
        print( {"spearman_coeff":spearman_coeff,
                "rank_x":rank_arr1.tolist(),
                "rank_y":rank_arr2.tolist(),
                "rank_diff":d.tolist()
                })
        return {"spearman_coeff":spearman_coeff,
                "rank_x":rank_arr1.tolist(),
                "rank_y":rank_arr2.tolist(),
                "rank_diff":d.tolist()
                }
    elif parameterType=="kendall":
        # 计算肯德尔系数
        coef, p = kendalltau(x, y)

        # 计算等级序数
        ranks1 = {val: i for i, val in enumerate(sorted(set(x)))}
        ranks2 = {val: i for i, val in enumerate(sorted(set(y)))}
        ranked1 = [ranks1[val] for val in x]
        ranked2 = [ranks2[val] for val in y]
        # 计算等级差等值
        diff = [ranked1[i] - ranked2[i] for i in range(len(ranked1))]


        return {"kendall_coeff": coef,
                "rank_x":ranked1,
                "rank_y":ranked2,
                "rank_diff":diff
                }
    else:
        return "参数错误，请检查参数拼写"

# 例子
# x = [5, 1, 4, 3, 2]
# y = [4, 2, 5, 1, 3]
# corr, rank_x, rank_y, d_x, d_y = calculateRankCorrelation(x, y)

@inferentialStatistics_api.route("/test/anova/checkOneWay",methods=["POST"])
def calculateOneWayANOVA():
    data1 = request.form.get("x")
    data2 = request.form.get("y")

    # str转list
    data1 = ast.literal_eval(data1)
    data2 = ast.literal_eval(data2)

    # 计算两组数据的均值
    mean1 = sum(data1) / len(data1)
    mean2 = sum(data2) / len(data2)

    # 计算两组数据的平方和
    ss1 = sum([(x - mean1) ** 2 for x in data1])
    ss2 = sum([(x - mean2) ** 2 for x in data2])

    # 计算汇总平方和
    sst = ss1 + ss2

    # 计算组内方差
    ssw = sum([(x - mean1) ** 2 for x in data1]) + sum([(x - mean2) ** 2 for x in data2])

    # 计算组间方差
    ssb = ((mean1 - mean2) ** 2) * len(data1)

    # 计算自由度
    dfb = 1
    dfw = len(data1) + len(data2) - 2

    # 计算组间均方
    msb = ssb / dfb

    # 计算组内均方
    msw = ssw / dfw

    # 计算f值
    f_value = msb / msw

    # 返回f值和相关值
    return {"f_value":f_value,
                "mean1":mean1,
                "mean2":mean2,
                    "ss1":ss1,
                    "ss2":ss2,
                    "sst":sst,
                    "ssw":ssw,
                    "ssb":ssb,
                    "dfb":dfb,
                    "dfw":dfw,
                    "msb":msb,
                     "msw":msw
            }
# 案例
# import random
# # 生成两组随机数据，均值相等
# data1 = [random.randint(0, 10) for _ in range(10)]
# data2 = [random.randint(0, 10) for _ in range(10)]

@inferentialStatistics_api.route("/test/anova/nkCheck",methods=["POST"])
def nKTestPostHocAnalysis():
    """
         根据两个数组进行方差分析的事后检验（n-k验证）
         :param a: 第一个数组
         :param b: 第二个数组
         :return: 检验结果字符串
         """
    data1 = request.form.get("x")
    data2 = request.form.get("y")

    # str转list
    data1 = ast.literal_eval(data1)
    data2 = ast.literal_eval(data2)

    # 计算均值、样本大小和方差
    n1 = len(data1)
    n2 = len(data2)
    mean1 = np.mean(data1)
    mean2 = np.mean(data2)
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    # 计算F统计量
    f = var1 / var2
    # 计算自由度
    df1 = n1 - 1
    df2 = n2 - 1
    # 计算p值
    p = 1 - stats.f.cdf(f, df1, df2)
    # 计算T2统计量
    t2 = ((n1 * n2) / (n1 + n2)) * ((mean1 - mean2) ** 2) / ((1 / n1 + 1 / n2) * (var1 / n1 + var2 / n2))
    # 计算H统计量
    h = t2 * (1 / var1 + 1 / var2)
    # 计算p值
    p_h = 1 - stats.f.cdf(h, 1, df1 + df2)
    # 判断结论
    if p < 0.05:
        conclusion = "拒绝原假设，两个样本均值不相等"
        if p_h < 0.05:
            conclusion += "，且存在显著的差异"
        else:
            conclusion += "，但不存在显著的差异"
    else:
        conclusion = "接受原假设，两个样本均值相等"
    # 输出结果
    result = {
        "F统计量": f,
        "F p值": p,
        "T2统计量": t2,
        "H统计量": h,
        "H p值": p_h,
        "结论": conclusion
    }
    return result
# group1 = [20, 30, 25, 35, 28]
# group2 = [15, 25, 20, 30, 18]
# nKTestPostHocAnalysis(group1, group2)

@inferentialStatistics_api.route("/test/chisquare/checkEqual",methods=["POST"])
def chi_squared_homogeneity():
    """
    卡方检验-配合度检验
    :param observed: 二维数组，观测频数表
    :param expected: 二维数组，期望频数表
    :return: (统计量, p值, 自由度, 所有分类的期望频数)
    """
    observed = request.form.get("observed")
    expected = request.form.get("expected")

    observed = np.array(ast.literal_eval(observed))
    expected =np.array( ast.literal_eval(expected))

    # 计算所有分类的期望频数
    total_observed = observed.sum()
    total_expected = expected.sum()
    row_sums = observed.sum(axis=1, keepdims=True)
    col_sums = observed.sum(axis=0, keepdims=True)
    expected = row_sums * col_sums / total_observed

    # 计算卡方值和p值
    statistic, p_value = chisquare(observed.flatten(), expected.flatten())

    # 计算自由度
    num_rows, num_cols = observed.shape
    degrees_of_freedom = (num_rows - 1) * (num_cols - 1)
    print(statistic, p_value, degrees_of_freedom, expected.tolist())

    return {"statistic":statistic, "p_value":p_value, "degrees_of_freedom":degrees_of_freedom, "expected":expected.tolist()}
# # 测试
# observed = np.array([[3, 5, 2], [6, 3, 1], [2, 4, 3]])
# expected = np.array([[2, 4, 4], [4, 4, 2], [3, 3, 3]])

@inferentialStatistics_api.route("/test/chisquare/checkIsolation",methods=["POST"])
def chisquare_independence_test():


    observed = request.form.get("observed")
    observed = np.array(ast.literal_eval(observed))

    # 对观察到的数据执行卡方独立性检验
    chi2, p, dof, expected = chi2_contingency(observed)

    # 构建结果字符串
    result = "卡方值为：{:.2f}\n".format(chi2)
    result += "p 值为：{:.4f}\n".format(p)
    result += "自由度为：{}\n".format(dof)
    result += "期望值为：\n{}\n".format(expected)

    # 根据 p 值确定结论
    if p < 0.05:
        result += "结论：拒绝原假设，两个变量不独立"
    else:
        result += "结论：接受原假设，两个变量独立"

    return result

# 测试
# observed = np.array([[10, 20, 30],
#                      [20, 30, 40],
#                      [30, 40, 50]])
@inferentialStatistics_api.route("/test/chisquare/checkIsLike",methods=["POST"])
def chisq_homogeneity_test():
    """
    卡方检验-同质性检验
    输入：
    obs: 二维数组，观测频数矩阵
    输出：
    p_value: p值
    expected: 期望频数矩阵
    chisq_statistic: 卡方统计量
    """
    obs = request.form.get("observed")
    obs = np.array(ast.literal_eval(obs))


    # 计算行、列总计频数
    row_sum = np.sum(obs, axis=1)
    col_sum = np.sum(obs, axis=0)

    # 计算总频数
    total = np.sum(row_sum)

    # 计算期望频数矩阵
    expected = np.outer(row_sum, col_sum) / total

    # 计算卡方统计量
    chisq_statistic = np.sum((obs - expected) ** 2 / expected)

    # 计算自由度
    df = (obs.shape[0] - 1) * (obs.shape[1] - 1)

    # 计算p值
    p_value = 1 - chi2.cdf(chisq_statistic, df)
    return {"p_value":p_value, "expected":expected.tolist(), "chisq_statistic":chisq_statistic}

# 测试
# obs = np.array([[30, 40, 30],
#                 [60, 50, 70]])
# p值: 0.5026171901920103
# 期望频数矩阵:
# [[34.28571429 34.28571429 31.42857143]
#  [55.71428571 55.71428571 51.42857143]]
# 卡方统计量: 1.1619047619047619

@inferentialStatistics_api.route("/test/linearRegrssion/check",methods=["POST"])
def linearRegressionAnalysisDetection():
    """
    线性回归分析-检测

    Args:
        x (array-like): 自变量
        y (array-like): 因变量
        degree:次

    Returns:
        slope (float): 斜率
        pvalue (float): 显著性检验的p值
        equation (str): 回归方程
    """
    x = request.form.get("x")
    y = request.form.get("y")
    degree = request.form.get("degree")
    # str转list
    x = ast.literal_eval(x)
    y = ast.literal_eval(y)

    if degree=='1':
        slope, intercept, rvalue, pvalue, stderr = linregress(x, y)
        equation = f'y = {slope:.2f}x + {intercept:.2f}'
        return {"slope":slope, "p_value":pvalue, "equation":equation}
    elif degree=='2':
        x=np.array(x)
        y=np.array(y)
        x_squared = x ** 2
        X = sm.add_constant(np.column_stack((x, x_squared)))
        model = sm.OLS(y, X)
        results = model.fit()
        beta_0 = results.params[0]
        beta_1 = results.params[1]
        beta_2 = results.params[2]
        reg_eqn = f'y = {beta_0:.3f} + {beta_1:.3f}x + {beta_2:.3f}x^2'
        f_value = results.fvalue
        p_value = results.f_pvalue
        return {"beta_0":beta_0, "beta_1":beta_1, "beta_2":beta_2, "reg_eqn":reg_eqn, "f_value":f_value, "p_value":p_value}
# 测试案例
# x = [1, 2, 3, 4, 5]
# y = [2, 4, 5, 4, 6]


@inferentialStatistics_api.route("/test/linearRegrssion/predict",methods=["POST"])
def linearRegressionAnalysisPrediction():
    x = request.form.get("x")
    y = request.form.get("y")
    new_x = float(request.form.get("new_x"))

    # str转list
    x = ast.literal_eval(x)
    y = ast.literal_eval(y)

    # 计算回归系数
    n = len(x)
    xy_sum = sum([x[i] * y[i] for i in range(n)])
    x_sum = sum(x)
    y_sum = sum(y)
    x_sqr_sum = sum([x[i] ** 2 for i in range(n)])
    a = (n * xy_sum - x_sum * y_sum) / (n * x_sqr_sum - x_sum ** 2)
    b = (x_sqr_sum * y_sum - x_sum * xy_sum) / (n * x_sqr_sum - x_sum ** 2)

    # 输出回归方程
    print("回归方程为：y = {:.2f}x + {:.2f}".format(a, b))

    # 对新x值进行预测并输出结果
    y_pred = a*new_x + b
    return {"new_x":new_x,"equation":"y = {:.2f}x + {:.2f}".format(a, b)}

# 测试
# x = [1, 2, 3, 4, 5]
# y = [2, 4, 5, 4, 5]
# new_x = 6
#
# linearRegressionAnalysisPrediction(x, y, new_x)